1. Descriptive Analysis
1.1. Get time series plots for all three data
#1. tsplot
tsplot(allHC$hate_crime)

tsplot(blackHC$hate_crime)

tsplot(asianHC$hate_crime)

culer = c(rgb(.85,.30,.12,.6), rgb(.12,.65,.85,.6), "aquamarine3")
tsplot(allHC$hate_crime, col=culer[1], lwd=2, pch=20, ylim=c(min(asianHC$hate_crime), max(allHC$hate_crime))
,ylab="Hate crimes", main="Racial Hate Crimes")
lines(blackHC$hate_crime, col=culer[2], lwd=2, pch=20)
lines(asianHC$hate_crime, col=culer[3], lwd=2, pch=20)
legend("topleft", col=culer, lty=1, lwd=2, pch=20, legend=c("All race", "African American/Black", "Asian"), bg="white")

culer = c(rgb(.85,.30,.12,.6), rgb(.12,.65,.85,.6), "aquamarine3")
tsplot(log(allHC$hate_crime), col=culer[1], lwd=2, pch=20, ylim=c(min(log(asianHC$hate_crime)), max(log(allHC$hate_crime)+1))
,ylab="log(Hate crimes)", main="Racial Hate Crimes")
lines(log(blackHC$hate_crime), col=culer[2], lwd=2, pch=20)
lines(log(asianHC$hate_crime), col=culer[3], lwd=2, pch=20)
legend("topleft", col=culer, lty=1, lwd=2, pch=20, legend=c("All race", "African American/Black", "Asian"), bg="white")

#2. time series plot with date in x-axis
ts1 <- xts(allHC$hate_crime, as.POSIXct(sprintf("%d-%d-01", allHC$year, allHC$month)))
ts2 <- xts(allHC$hate_crime, as.yearmon(allHC$year + (allHC$month-1)/12))
plot(ts2, main="All Racial Hate Crimes")

ts3 <- xts(blackHC$hate_crime, as.POSIXct(sprintf("%d-%d-01", blackHC$year, blackHC$month)))
ts4 <- xts(blackHC$hate_crime, as.yearmon(blackHC$year + (blackHC$month-1)/12))
plot(ts4, main="Anti-African American or Black Hate Crimes")

ts5<- xts(asianHC$hate_crime, as.POSIXct(sprintf("%d-%d-01", asianHC$year, asianHC$month)))
ts6 <- xts(asianHC$hate_crime, as.yearmon(asianHC$year + (asianHC$month-1)/12))
plot(ts6, main="Anti-Asian Hate Crimes")

1.2. Summary statistics and distribution
summary(allHC)
## year month hate_crime
## Min. :1991 Min. : 1.00 Min. : 198.0
## 1st Qu.:1998 1st Qu.: 3.75 1st Qu.: 319.0
## Median :2006 Median : 6.50 Median : 379.0
## Mean :2006 Mean : 6.50 Mean : 389.1
## 3rd Qu.:2013 3rd Qu.: 9.25 3rd Qu.: 448.0
## Max. :2020 Max. :12.00 Max. :1329.0
sd(allHC$hate_crime)
## [1] 112.6868
summary(blackHC)
## year month hate_crime
## Min. :1991 Min. : 1.00 Min. : 88.0
## 1st Qu.:1998 1st Qu.: 3.75 1st Qu.:167.0
## Median :2006 Median : 6.50 Median :202.0
## Mean :2006 Mean : 6.50 Mean :207.7
## 3rd Qu.:2013 3rd Qu.: 9.25 3rd Qu.:242.5
## Max. :2020 Max. :12.00 Max. :693.0
sd(blackHC$hate_crime)
## [1] 61.32818
summary(asianHC)
## year month hate_crime
## Min. :1991 Min. : 1.00 Min. : 3.00
## 1st Qu.:1998 1st Qu.: 3.75 1st Qu.:12.00
## Median :2006 Median : 6.50 Median :17.00
## Mean :2006 Mean : 6.50 Mean :17.84
## 3rd Qu.:2013 3rd Qu.: 9.25 3rd Qu.:23.00
## Max. :2020 Max. :12.00 Max. :51.00
sd(asianHC$hate_crime)
## [1] 8.30969
#distribution
hist(allHC$hate_crime, col=culer[1], main="Distribution of All Racial Hate Crimes", xlab="Number of Hate Crimes")

hist(blackHC$hate_crime, col=culer[2], main="Distribution of Anti-Black Racial Hate Crimes", xlab="Number of Hate Crimes")

hist(asianHC$hate_crime, col=culer[3], main="Distribution of Anti-Asian Racial Hate Crimes", xlab="Number of Hate Crimes")

1.3. Check and describe outliers
allHC$difference<-c(0,diff(allHC$hate_crime))
iqr = IQR(diff(allHC$hate_crime))
Q <- quantile(allHC$difference, probs=c(.25, .75), na.rm = FALSE)
high <- Q[2]+1.5*iqr
low <- Q[1]-1.5*iqr
tsplot(allHC$difference, main="Detecting Outliers Using IQR Score: All Racial Hate Crimes", ylab="Differenced(hate crime)")
abline(a = high, 0, lty = 2, col = 'red')
abline(a = low, 0, lty = 2, col = 'red')

blackHC$difference<-c(0,diff(blackHC$hate_crime))
iqr = IQR(diff(blackHC$hate_crime))
Q <- quantile(blackHC$difference, probs=c(.25, .75), na.rm = FALSE)
#Qtest<-quantile(blackHC$difference)
#Qtest[4] #3rd quntile
high <- Q[2]+1.5*iqr
low <- Q[1]-1.5*iqr
tsplot(blackHC$difference,main="Detecting Outliers Using IQR Score: Anti-Black Hate Crimes", ylab ="Differenced(hate crime)" )
abline(a = high, 0, lty = 2, col = 'red')
abline(a = low, 0, lty = 2, col = 'red')

#blackHC[blackHC$difference > high, ]
#blackHC[blackHC$difference < low, ]
#outliers=c(which(blackHC$difference > high), which(blackHC$difference < low)) #12 outliers
#blackHC_no_outliers=blackHC[-outliers,]
#tsplot(blackHC_no_outliers$difference) #getting rid of outliers look stationary. In this case can we exclude outliers?
#tsplot(diff(blackHC_no_outliers$hate_crime)) #why do this plot and the plot above looks different?
#check for the outliers
asianHC$difference<-c(0,diff(asianHC$hate_crime))
iqr = IQR(diff(asianHC$hate_crime))
Q <- quantile(asianHC$difference, probs=c(.25, .75), na.rm = FALSE)
high <- Q[2]+1.5*iqr
low <- Q[1]-1.5*iqr
tsplot(asianHC$difference, main="Detecting Outliers Using IQR Score: Anti-Asian Hate Crimes", ylab="Differenced(hate crime)")
abline(a = high, 0, lty = 2, col = 'red')
abline(a = low, 0, lty = 2, col = 'red')

#asianHC[asianHC$difference > high, ]
#asianHC[asianHC$difference < low, ]
#outliers=c(which(asianHC$difference > high), which(asianHC$difference < low)) #8 outliers
#asianHC_no_outliers=asianHC[-outliers,]
2. Predictive Analysis: stepwise multiple regression
set.seed(123)
#0. create train and test data
indices = sample(2, nrow(DF2), replace = TRUE, prob=c(0.8,0.2)) #80% train data
train = DF2[indices == 1,]
test = DF2[indices == 2,]
plot(train)

#1. all racial hate crime
mod_all <- lm(ALL_HC_PER_CAPITA ~ . , data=DF2)
summary(mod_all) #0.9796
##
## Call:
## lm(formula = ALL_HC_PER_CAPITA ~ ., data = DF2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4819 -0.6689 -0.0805 0.4016 4.9962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.285e+01 1.093e+01 -1.176 0.247
## MEDIAN_INCOME 3.147e-05 8.202e-05 0.384 0.703
## POP -5.437e-08 4.100e-08 -1.326 0.193
## WHITE_PER_CAPITA 5.361e-02 6.463e-02 0.829 0.412
## BLACK_PER_CAPITA 5.489e-02 5.012e-02 1.095 0.280
## ASIAN_PER_CAPITA 8.057e-02 1.149e-01 0.701 0.487
## FOREIGN_BORN_NON_US_RATIO 2.258e-02 1.388e-01 0.163 0.872
## POVERTY_PERCENT 1.861e-01 2.527e-01 0.736 0.466
## UNEMPLOYMENT_RATE 3.379e-02 3.034e-01 0.111 0.912
## SCHOOL_ENROLLMENT_RATE 8.111e-02 1.557e-01 0.521 0.605
## BACHELOR_RATE 8.163e-02 9.918e-02 0.823 0.416
## BLACK_HC_PER_CAPITA_POP 2.778e+00 1.688e-01 16.459 <2e-16 ***
## ASIAN_HC_PER_CAPITA_POP -1.433e+00 1.119e+00 -1.281 0.208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.279 on 37 degrees of freedom
## Multiple R-squared: 0.9859, Adjusted R-squared: 0.9814
## F-statistic: 216.1 on 12 and 37 DF, p-value: < 2.2e-16
vif(mod_all)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 19.605504 2.647671 20.438757
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 6.828188 12.186095 4.732608
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 16.153629 3.965255 1.915081
## BACHELOR_RATE BLACK_HC_PER_CAPITA_POP ASIAN_HC_PER_CAPITA_POP
## 4.598925 10.937433 10.043324
mod_all_back=step(mod_all, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_all_back)

summary(mod_all_back)
##
## Call:
## lm(formula = ALL_HC_PER_CAPITA ~ BLACK_HC_PER_CAPITA_POP, data = DF2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4573 -0.4753 -0.2354 0.0896 5.4445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.37784 0.20497 1.843 0.0714 .
## BLACK_HC_PER_CAPITA_POP 2.59601 0.04874 53.262 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.221 on 48 degrees of freedom
## Multiple R-squared: 0.9834, Adjusted R-squared: 0.983
## F-statistic: 2837 on 1 and 48 DF, p-value: < 2.2e-16
#vif(mod_all_back)
#model without hate crime variables
mod_all2 <- lm(ALL_HC_PER_CAPITA ~ . -BLACK_HC_PER_CAPITA_POP -ASIAN_HC_PER_CAPITA_POP, data=DF2)
summary(mod_all2) #0.9796
##
## Call:
## lm(formula = ALL_HC_PER_CAPITA ~ . - BLACK_HC_PER_CAPITA_POP -
## ASIAN_HC_PER_CAPITA_POP, data = DF2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.497 -4.023 -0.849 1.866 31.907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.671e+02 6.050e+01 -2.761 0.00873 **
## MEDIAN_INCOME 9.784e-04 4.693e-04 2.085 0.04370 *
## POP 2.214e-07 2.333e-07 0.949 0.34848
## WHITE_PER_CAPITA 8.272e-01 3.641e-01 2.272 0.02867 *
## BLACK_PER_CAPITA 4.926e-01 2.925e-01 1.684 0.10014
## ASIAN_PER_CAPITA 1.128e+00 6.568e-01 1.717 0.09383 .
## FOREIGN_BORN_NON_US_RATIO 6.640e-01 8.027e-01 0.827 0.41315
## POVERTY_PERCENT 3.247e+00 1.438e+00 2.258 0.02959 *
## UNEMPLOYMENT_RATE 1.202e-01 1.802e+00 0.067 0.94718
## SCHOOL_ENROLLMENT_RATE -4.540e-01 9.282e-01 -0.489 0.62751
## BACHELOR_RATE 1.538e-01 5.960e-01 0.258 0.79777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.701 on 39 degrees of freedom
## Multiple R-squared: 0.4624, Adjusted R-squared: 0.3245
## F-statistic: 3.354 on 10 and 39 DF, p-value: 0.003123
vif(mod_all2)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 17.704275 2.364849 17.888133
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 6.414012 10.986843 4.365748
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 14.419097 3.858263 1.877929
## BACHELOR_RATE
## 4.579832
mod_all_back2=step(mod_all2, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_all_back2)

summary(mod_all_back2)
##
## Call:
## lm(formula = ALL_HC_PER_CAPITA ~ MEDIAN_INCOME + WHITE_PER_CAPITA +
## BLACK_PER_CAPITA + ASIAN_PER_CAPITA + FOREIGN_BORN_NON_US_RATIO +
## POVERTY_PERCENT, data = DF2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.117 -4.083 -1.195 1.683 30.704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.996e+02 5.093e+01 -3.919 0.000314 ***
## MEDIAN_INCOME 1.037e-03 3.369e-04 3.077 0.003633 **
## WHITE_PER_CAPITA 1.014e+00 3.012e-01 3.367 0.001610 **
## BLACK_PER_CAPITA 6.411e-01 2.412e-01 2.658 0.011000 *
## ASIAN_PER_CAPITA 1.438e+00 5.553e-01 2.590 0.013043 *
## FOREIGN_BORN_NON_US_RATIO 1.100e+00 5.407e-01 2.034 0.048106 *
## POVERTY_PERCENT 3.541e+00 1.103e+00 3.211 0.002503 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.462 on 43 degrees of freedom
## Multiple R-squared: 0.4435, Adjusted R-squared: 0.3658
## F-statistic: 5.71 on 6 and 43 DF, p-value: 0.0001961
vif(mod_all_back2)
## MEDIAN_INCOME WHITE_PER_CAPITA BLACK_PER_CAPITA
## 9.714941 13.039657 4.646942
## ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO POVERTY_PERCENT
## 8.362728 2.109388 9.032896
#2. anti-black hate crime
mod_black <- lm(BLACK_HC_PER_CAPITA_POP ~ . , data=train)
summary(mod_black)
##
## Call:
## lm(formula = BLACK_HC_PER_CAPITA_POP ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.46460 -0.15708 0.00272 0.19000 0.85507
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.645e+00 4.422e+00 0.824 0.4173
## MEDIAN_INCOME 3.169e-06 3.485e-05 0.091 0.9282
## POP 6.587e-09 1.908e-08 0.345 0.7327
## WHITE_PER_CAPITA 3.184e-03 2.491e-02 0.128 0.8993
## BLACK_PER_CAPITA 7.076e-03 2.255e-02 0.314 0.7562
## ASIAN_PER_CAPITA -6.421e-02 9.880e-02 -0.650 0.5215
## FOREIGN_BORN_NON_US_RATIO 3.076e-02 6.323e-02 0.487 0.6306
## POVERTY_PERCENT -1.086e-01 1.102e-01 -0.985 0.3338
## UNEMPLOYMENT_RATE 2.343e-02 1.152e-01 0.203 0.8404
## SCHOOL_ENROLLMENT_RATE -6.872e-02 6.388e-02 -1.076 0.2919
## BACHELOR_RATE -5.457e-02 4.405e-02 -1.239 0.2265
## ALL_HC_PER_CAPITA 3.287e-01 2.184e-02 15.050 2.38e-14 ***
## ASIAN_HC_PER_CAPITA_POP 1.007e+00 3.835e-01 2.624 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4413 on 26 degrees of freedom
## Multiple R-squared: 0.9886, Adjusted R-squared: 0.9834
## F-statistic: 188.3 on 12 and 26 DF, p-value: < 2.2e-16
vif(mod_black)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 17.682850 2.216319 11.184198
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 7.437974 8.553221 5.231084
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 15.839536 3.760373 2.392602
## BACHELOR_RATE ALL_HC_PER_CAPITA ASIAN_HC_PER_CAPITA_POP
## 4.918854 7.777060 6.736593
mod_black_back=step(mod_black, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_black_back)

summary(mod_black_back)
##
## Call:
## lm(formula = BLACK_HC_PER_CAPITA_POP ~ ALL_HC_PER_CAPITA + ASIAN_HC_PER_CAPITA_POP,
## data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52642 -0.08406 0.02117 0.10565 0.97951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04600 0.07958 -0.578 0.56686
## ALL_HC_PER_CAPITA 0.31949 0.01660 19.241 < 2e-16 ***
## ASIAN_HC_PER_CAPITA_POP 1.07965 0.31329 3.446 0.00146 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 36 degrees of freedom
## Multiple R-squared: 0.987, Adjusted R-squared: 0.9862
## F-statistic: 1364 on 2 and 36 DF, p-value: < 2.2e-16
vif(mod_black_back)
## ALL_HC_PER_CAPITA ASIAN_HC_PER_CAPITA_POP
## 5.43352 5.43352
#model without hate crime variables
mod_black2 <- lm(BLACK_HC_PER_CAPITA_POP ~ . -ALL_HC_PER_CAPITA-ASIAN_HC_PER_CAPITA_POP, data=train)
summary(mod_black2)
##
## Call:
## lm(formula = BLACK_HC_PER_CAPITA_POP ~ . - ALL_HC_PER_CAPITA -
## ASIAN_HC_PER_CAPITA_POP, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6105 -1.7486 -0.2017 0.8917 10.3159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.435e+01 2.551e+01 -2.523 0.0176 *
## MEDIAN_INCOME 3.095e-04 2.224e-04 1.391 0.1750
## POP 7.469e-08 1.229e-07 0.608 0.5483
## WHITE_PER_CAPITA 3.062e-01 1.515e-01 2.021 0.0529 .
## BLACK_PER_CAPITA 1.942e-01 1.445e-01 1.344 0.1897
## ASIAN_PER_CAPITA 1.416e+00 5.645e-01 2.508 0.0182 *
## FOREIGN_BORN_NON_US_RATIO -1.962e-01 3.940e-01 -0.498 0.6224
## POVERTY_PERCENT 1.355e+00 6.672e-01 2.031 0.0518 .
## UNEMPLOYMENT_RATE -3.618e-01 7.536e-01 -0.480 0.6348
## SCHOOL_ENROLLMENT_RATE -1.152e-02 4.136e-01 -0.028 0.9780
## BACHELOR_RATE 9.194e-02 2.876e-01 0.320 0.7516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.915 on 28 degrees of freedom
## Multiple R-squared: 0.4654, Adjusted R-squared: 0.2745
## F-statistic: 2.438 on 10 and 28 DF, p-value: 0.0309
vif(mod_black2)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 16.501417 2.107509 9.480899
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 6.996064 6.399358 4.655469
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 13.298666 3.688606 2.298976
## BACHELOR_RATE
## 4.804371
mod_black_back2=step(mod_black2, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_black_back2)

summary(mod_black_back2)
##
## Call:
## lm(formula = BLACK_HC_PER_CAPITA_POP ~ MEDIAN_INCOME + WHITE_PER_CAPITA +
## BLACK_PER_CAPITA + ASIAN_PER_CAPITA + POVERTY_PERCENT, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4842 -1.4136 -0.3367 0.9879 10.2929
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.622e+01 2.181e+01 -3.036 0.00466 **
## MEDIAN_INCOME 3.000e-04 1.615e-04 1.858 0.07217 .
## WHITE_PER_CAPITA 3.595e-01 1.209e-01 2.973 0.00547 **
## BLACK_PER_CAPITA 2.502e-01 1.055e-01 2.372 0.02370 *
## ASIAN_PER_CAPITA 1.326e+00 4.040e-01 3.283 0.00243 **
## POVERTY_PERCENT 1.147e+00 5.038e-01 2.277 0.02937 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.723 on 33 degrees of freedom
## Multiple R-squared: 0.4504, Adjusted R-squared: 0.3672
## F-statistic: 5.41 on 5 and 33 DF, p-value: 0.0009665
vif(mod_black_back2)
## MEDIAN_INCOME WHITE_PER_CAPITA BLACK_PER_CAPITA ASIAN_PER_CAPITA
## 9.977125 6.922026 4.275537 3.757292
## POVERTY_PERCENT
## 8.691638
#3. anti-asian hate crime
mod_asian <- lm(ASIAN_HC_PER_CAPITA_POP ~ ., data=train)
summary(mod_asian)
##
## Call:
## lm(formula = ASIAN_HC_PER_CAPITA_POP ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64048 -0.05295 0.00483 0.07283 0.46802
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.507e-01 2.031e+00 0.370 0.7147
## MEDIAN_INCOME 4.104e-06 1.583e-05 0.259 0.7974
## POP -8.752e-09 8.524e-09 -1.027 0.3140
## WHITE_PER_CAPITA -7.944e-03 1.122e-02 -0.708 0.4853
## BLACK_PER_CAPITA -2.615e-03 1.026e-02 -0.255 0.8008
## ASIAN_PER_CAPITA -3.141e-02 4.486e-02 -0.700 0.4901
## FOREIGN_BORN_NON_US_RATIO 3.351e-02 2.812e-02 1.192 0.2441
## POVERTY_PERCENT 3.448e-03 5.104e-02 0.068 0.9467
## UNEMPLOYMENT_RATE -2.997e-02 5.207e-02 -0.575 0.5699
## SCHOOL_ENROLLMENT_RATE -1.119e-02 2.960e-02 -0.378 0.7084
## BACHELOR_RATE 1.294e-03 2.061e-02 0.063 0.9504
## ALL_HC_PER_CAPITA -2.946e-02 3.040e-02 -0.969 0.3414
## BLACK_HC_PER_CAPITA_POP 2.081e-01 7.928e-02 2.624 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2006 on 26 degrees of freedom
## Multiple R-squared: 0.8826, Adjusted R-squared: 0.8285
## F-statistic: 16.3 on 12 and 26 DF, p-value: 3.856e-09
vif(mod_asian)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 17.642846 2.139721 10.979608
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 7.447525 8.531338 5.005225
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 16.427579 3.718994 2.485430
## BACHELOR_RATE ALL_HC_PER_CAPITA BLACK_HC_PER_CAPITA_POP
## 5.208384 72.894602 69.498326
mod_asian_back=step(mod_asian, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_asian_back)

summary(mod_asian_back)
##
## Call:
## lm(formula = ASIAN_HC_PER_CAPITA_POP ~ POP + FOREIGN_BORN_NON_US_RATIO +
## ALL_HC_PER_CAPITA + BLACK_HC_PER_CAPITA_POP, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66987 -0.05725 0.02294 0.06072 0.44670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.000e-01 5.848e-02 -1.711 0.09624 .
## POP -8.618e-09 6.357e-09 -1.356 0.18413
## FOREIGN_BORN_NON_US_RATIO 2.804e-02 1.413e-02 1.984 0.05542 .
## ALL_HC_PER_CAPITA -3.377e-02 2.440e-02 -1.384 0.17531
## BLACK_HC_PER_CAPITA_POP 2.161e-01 6.517e-02 3.316 0.00218 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.18 on 34 degrees of freedom
## Multiple R-squared: 0.8765, Adjusted R-squared: 0.862
## F-statistic: 60.35 on 4 and 34 DF, p-value: 5.722e-15
vif(mod_asian_back)
## POP FOREIGN_BORN_NON_US_RATIO ALL_HC_PER_CAPITA
## 1.479044 1.571992 58.347647
## BLACK_HC_PER_CAPITA_POP
## 58.369345
#model without hate crime variables
mod_asian2 <- lm(ASIAN_HC_PER_CAPITA_POP ~ . -ALL_HC_PER_CAPITA-BLACK_HC_PER_CAPITA_POP, data=train)
summary(mod_asian2)
##
## Call:
## lm(formula = ASIAN_HC_PER_CAPITA_POP ~ . - ALL_HC_PER_CAPITA -
## BLACK_HC_PER_CAPITA_POP, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49454 -0.23939 -0.00395 0.09092 1.71530
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.193e+00 3.788e+00 -1.899 0.0679 .
## MEDIAN_INCOME 4.511e-05 3.303e-05 1.366 0.1829
## POP 7.520e-10 1.825e-08 0.041 0.9674
## WHITE_PER_CAPITA 3.145e-02 2.250e-02 1.398 0.1732
## BLACK_PER_CAPITA 2.311e-02 2.146e-02 1.077 0.2908
## ASIAN_PER_CAPITA 1.434e-01 8.384e-02 1.711 0.0982 .
## FOREIGN_BORN_NON_US_RATIO 1.433e-02 5.851e-02 0.245 0.8083
## POVERTY_PERCENT 1.695e-01 9.909e-02 1.711 0.0981 .
## UNEMPLOYMENT_RATE -7.773e-02 1.119e-01 -0.695 0.4930
## SCHOOL_ENROLLMENT_RATE -2.057e-02 6.143e-02 -0.335 0.7402
## BACHELOR_RATE 8.014e-03 4.271e-02 0.188 0.8525
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4329 on 28 degrees of freedom
## Multiple R-squared: 0.4116, Adjusted R-squared: 0.2014
## F-statistic: 1.959 on 10 and 28 DF, p-value: 0.07867
vif(mod_asian2)
## MEDIAN_INCOME POP WHITE_PER_CAPITA
## 16.501417 2.107509 9.480899
## BLACK_PER_CAPITA ASIAN_PER_CAPITA FOREIGN_BORN_NON_US_RATIO
## 6.996064 6.399358 4.655469
## POVERTY_PERCENT UNEMPLOYMENT_RATE SCHOOL_ENROLLMENT_RATE
## 13.298666 3.688606 2.298976
## BACHELOR_RATE
## 4.804371
mod_asian_back2=step(mod_asian2, direction="backward", trace=0)
par(mfrow=c(2,2))
plot(mod_asian_back2)

summary(mod_asian_back2)
##
## Call:
## lm(formula = ASIAN_HC_PER_CAPITA_POP ~ MEDIAN_INCOME + WHITE_PER_CAPITA +
## BLACK_PER_CAPITA + ASIAN_PER_CAPITA + POVERTY_PERCENT, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46558 -0.25852 -0.00099 0.16308 1.74991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.555e+00 3.236e+00 -2.334 0.0258 *
## MEDIAN_INCOME 3.961e-05 2.396e-05 1.653 0.1078
## WHITE_PER_CAPITA 3.676e-02 1.794e-02 2.049 0.0485 *
## BLACK_PER_CAPITA 2.598e-02 1.565e-02 1.660 0.1064
## ASIAN_PER_CAPITA 1.556e-01 5.994e-02 2.596 0.0140 *
## POVERTY_PERCENT 1.319e-01 7.474e-02 1.765 0.0869 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4039 on 33 degrees of freedom
## Multiple R-squared: 0.3963, Adjusted R-squared: 0.3048
## F-statistic: 4.332 on 5 and 33 DF, p-value: 0.003856
vif(mod_asian_back2)
## MEDIAN_INCOME WHITE_PER_CAPITA BLACK_PER_CAPITA ASIAN_PER_CAPITA
## 9.977125 6.922026 4.275537 3.757292
## POVERTY_PERCENT
## 8.691638
3. Predictive Analysis: k nearest neighbors
set.seed(123)
#1. all racial hate crime
#Let caret search for best k
trctrl = trainControl(method = "cv", number = 10) #10-fold cv
simple_fit3= train(ALL_HC_PER_CAPITA~.,
data=train,
method = "knn",
trControl = trctrl,
tuneLength = 50)
plot(simple_fit3)

simple_fit3
## k-Nearest Neighbors
##
## 39 samples
## 12 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 35, 35, 35, 35, 35, 35, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 6.322412 0.4237717 4.551636
## 7 6.420670 0.4056259 4.601590
## 9 6.607362 0.3613087 4.776129
## 11 6.307716 0.3844472 4.477374
## 13 6.180925 0.4416995 4.267641
## 15 6.152784 0.4458449 4.209575
## 17 6.156900 0.4387926 4.297701
## 19 6.010273 0.4613615 4.230657
## 21 5.953789 0.4649601 4.242796
## 23 5.963167 0.4642294 4.232403
## 25 6.044545 0.4504058 4.313037
## 27 6.479173 0.4084240 4.832036
## 29 6.933877 0.4108113 5.363944
## 31 7.064728 0.4432337 5.492342
## 33 7.050698 0.3983654 5.456585
## 35 7.065644 0.4098173 5.471668
## 37 7.062176 0.4098173 5.475572
## 39 7.062176 0.4098173 5.475572
## 41 7.062176 0.4098173 5.475572
## 43 7.062176 0.4098173 5.475572
## 45 7.062176 0.4098173 5.475572
## 47 7.062176 0.4098173 5.475572
## 49 7.062176 0.4098173 5.475572
## 51 7.062176 0.4098173 5.475572
## 53 7.062176 0.4098173 5.475572
## 55 7.062176 0.4098173 5.475572
## 57 7.062176 0.4098173 5.475572
## 59 7.062176 0.4098173 5.475572
## 61 7.062176 0.4098173 5.475572
## 63 7.062176 0.4098173 5.475572
## 65 7.062176 0.4098173 5.475572
## 67 7.062176 0.4098173 5.475572
## 69 7.062176 0.4098173 5.475572
## 71 7.062176 0.4098173 5.475572
## 73 7.062176 0.4098173 5.475572
## 75 7.062176 0.4098173 5.475572
## 77 7.062176 0.4098173 5.475572
## 79 7.062176 0.4098173 5.475572
## 81 7.062176 0.4098173 5.475572
## 83 7.062176 0.4098173 5.475572
## 85 7.062176 0.4098173 5.475572
## 87 7.062176 0.4098173 5.475572
## 89 7.062176 0.4098173 5.475572
## 91 7.062176 0.4098173 5.475572
## 93 7.062176 0.4098173 5.475572
## 95 7.062176 0.4098173 5.475572
## 97 7.062176 0.4098173 5.475572
## 99 7.062176 0.4098173 5.475572
## 101 7.062176 0.4098173 5.475572
## 103 7.062176 0.4098173 5.475572
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 21.
#2. anti-black hate crime
simple_fit4= train(BLACK_HC_PER_CAPITA_POP~.,
data=train,
method = "knn",
trControl = trctrl,
tuneLength = 50)
plot(simple_fit4)

simple_fit4
## k-Nearest Neighbors
##
## 39 samples
## 12 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 35, 35, 35, 35, 35, 35, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 2.644446 0.4748750 1.795815
## 7 2.422895 0.5830589 1.732380
## 9 2.360427 0.6349595 1.722547
## 11 2.253279 0.6551436 1.621244
## 13 2.166296 0.6180502 1.509124
## 15 2.173714 0.5877432 1.528599
## 17 2.151263 0.5910766 1.499530
## 19 2.156769 0.5669521 1.531792
## 21 2.127495 0.5865246 1.534167
## 23 2.136914 0.5696179 1.535365
## 25 2.126744 0.5911237 1.533855
## 27 2.288571 0.5784928 1.675899
## 29 2.458393 0.5898117 1.866354
## 31 2.501090 0.6271986 1.883699
## 33 2.494183 0.5065472 1.875172
## 35 2.503459 0.3695950 1.880769
## 37 2.502700 0.3695950 1.882446
## 39 2.502700 0.3695950 1.882446
## 41 2.502700 0.3695950 1.882446
## 43 2.502700 0.3695950 1.882446
## 45 2.502700 0.3695950 1.882446
## 47 2.502700 0.3695950 1.882446
## 49 2.502700 0.3695950 1.882446
## 51 2.502700 0.3695950 1.882446
## 53 2.502700 0.3695950 1.882446
## 55 2.502700 0.3695950 1.882446
## 57 2.502700 0.3695950 1.882446
## 59 2.502700 0.3695950 1.882446
## 61 2.502700 0.3695950 1.882446
## 63 2.502700 0.3695950 1.882446
## 65 2.502700 0.3695950 1.882446
## 67 2.502700 0.3695950 1.882446
## 69 2.502700 0.3695950 1.882446
## 71 2.502700 0.3695950 1.882446
## 73 2.502700 0.3695950 1.882446
## 75 2.502700 0.3695950 1.882446
## 77 2.502700 0.3695950 1.882446
## 79 2.502700 0.3695950 1.882446
## 81 2.502700 0.3695950 1.882446
## 83 2.502700 0.3695950 1.882446
## 85 2.502700 0.3695950 1.882446
## 87 2.502700 0.3695950 1.882446
## 89 2.502700 0.3695950 1.882446
## 91 2.502700 0.3695950 1.882446
## 93 2.502700 0.3695950 1.882446
## 95 2.502700 0.3695950 1.882446
## 97 2.502700 0.3695950 1.882446
## 99 2.502700 0.3695950 1.882446
## 101 2.502700 0.3695950 1.882446
## 103 2.502700 0.3695950 1.882446
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 25.
#3. anti-asian hate crime
simple_fit5= train(ASIAN_HC_PER_CAPITA_POP~.,
data=train,
method = "knn",
trControl = trctrl,
tuneLength = 50)
plot(simple_fit5)

simple_fit5
## k-Nearest Neighbors
##
## 39 samples
## 12 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 35, 35, 35, 35, 35, 35, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 0.3691158 0.5747832 0.2394554
## 7 0.3289058 0.6438099 0.2202018
## 9 0.3028478 0.6967459 0.2024725
## 11 0.2880529 0.6554908 0.2007762
## 13 0.2743234 0.6403317 0.1896897
## 15 0.2696925 0.5931313 0.1883349
## 17 0.2627104 0.5862949 0.1849407
## 19 0.2612330 0.5943251 0.1800576
## 21 0.2542633 0.5985920 0.1741475
## 23 0.2535920 0.5562669 0.1729369
## 25 0.2528757 0.5025716 0.1751082
## 27 0.2786101 0.5570465 0.2011020
## 29 0.2968264 0.5488665 0.2182805
## 31 0.3017404 0.5206127 0.2234350
## 33 0.3006905 0.4938958 0.2219397
## 35 0.3015118 0.4560191 0.2232957
## 37 0.3016995 0.4560191 0.2234949
## 39 0.3016995 0.4560191 0.2234949
## 41 0.3016995 0.4560191 0.2234949
## 43 0.3016995 0.4560191 0.2234949
## 45 0.3016995 0.4560191 0.2234949
## 47 0.3016995 0.4560191 0.2234949
## 49 0.3016995 0.4560191 0.2234949
## 51 0.3016995 0.4560191 0.2234949
## 53 0.3016995 0.4560191 0.2234949
## 55 0.3016995 0.4560191 0.2234949
## 57 0.3016995 0.4560191 0.2234949
## 59 0.3016995 0.4560191 0.2234949
## 61 0.3016995 0.4560191 0.2234949
## 63 0.3016995 0.4560191 0.2234949
## 65 0.3016995 0.4560191 0.2234949
## 67 0.3016995 0.4560191 0.2234949
## 69 0.3016995 0.4560191 0.2234949
## 71 0.3016995 0.4560191 0.2234949
## 73 0.3016995 0.4560191 0.2234949
## 75 0.3016995 0.4560191 0.2234949
## 77 0.3016995 0.4560191 0.2234949
## 79 0.3016995 0.4560191 0.2234949
## 81 0.3016995 0.4560191 0.2234949
## 83 0.3016995 0.4560191 0.2234949
## 85 0.3016995 0.4560191 0.2234949
## 87 0.3016995 0.4560191 0.2234949
## 89 0.3016995 0.4560191 0.2234949
## 91 0.3016995 0.4560191 0.2234949
## 93 0.3016995 0.4560191 0.2234949
## 95 0.3016995 0.4560191 0.2234949
## 97 0.3016995 0.4560191 0.2234949
## 99 0.3016995 0.4560191 0.2234949
## 101 0.3016995 0.4560191 0.2234949
## 103 0.3016995 0.4560191 0.2234949
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 25.
4. Cross validation using test data
4.1. Calculate RMSE and normalized RMSE
set.seed(123)
#1. all racial hate crime: stepwise multiple regression with 12 variables
test_pred4 = predict(mod_all_back, newdata = test)
test_pred_df4=as.data.frame(test_pred4)
RMSE_4=sqrt(mean((test$ALL_HC_PER_CAPITA - test_pred_df4$test_pred4)^2))
N_RMSE_4=RMSE_4/(max(test$ALL_HC_PER_CAPITA)-min(test$ALL_HC_PER_CAPITA))
round(RMSE_4 , digits = 3)
## [1] 1.156
round(N_RMSE_4, digits = 3)
## [1] 0.037
#2. all racial hate crime: stepwise multiple regression with 10 variables
test_pred4.2 = predict(mod_all_back2, newdata = test)
test_pred_df4.2=as.data.frame(test_pred4.2)
RMSE_4.2=sqrt(mean((test$ALL_HC_PER_CAPITA - test_pred_df4.2$test_pred4.2)^2))
N_RMSE_4.2=RMSE_4.2/(max(test$ALL_HC_PER_CAPITA)-min(test$ALL_HC_PER_CAPITA))
round(RMSE_4.2 , digits = 3)
## [1] 5.95
round(N_RMSE_4.2 , digits = 3)
## [1] 0.191
#3. anti-black hate crime: stepwise multiple regression with 12 variables
test_pred5 = predict(mod_black_back, newdata = test)
test_pred_df5=as.data.frame(test_pred5)
RMSE_5=sqrt(mean((test$BLACK_HC_PER_CAPITA_POP - test_pred_df5$test_pred5)^2))
N_RMSE_5=RMSE_5/(max(test$BLACK_HC_PER_CAPITA_POP)-min(test$BLACK_HC_PER_CAPITA_POP))
round(RMSE_5 , digits = 3)
## [1] 0.49
round(N_RMSE_5 , digits = 3)
## [1] 0.04
#4. anti-black hate crime: stepwise multiple regression with 10 variables
test_pred5.2 = predict(mod_black_back2, newdata = test)
test_pred_df5.2=as.data.frame(test_pred5.2)
RMSE_5.2=sqrt(mean((test$BLACK_HC_PER_CAPITA_POP - test_pred_df5.2$test_pred5)^2))
N_RMSE_5.2=RMSE_5.2/(max(test$BLACK_HC_PER_CAPITA_POP)-min(test$BLACK_HC_PER_CAPITA_POP))
round(RMSE_5.2 , digits = 3)
## [1] 8.784
round(N_RMSE_5.2 , digits = 3)
## [1] 0.715
#5. anti-asian hate crime: stepwise multiple regression with 12 variables
test_pred6 = predict(mod_asian_back, newdata = test)
test_pred_df6=as.data.frame(test_pred6)
RMSE_6=sqrt(mean((test$ASIAN_HC_PER_CAPITA_POP - test_pred_df6$test_pred6)^2))
N_RMSE_6=RMSE_6/(max(test$ASIAN_HC_PER_CAPITA_POP)-min(test$ASIAN_HC_PER_CAPITA_POP))
round(RMSE_6 , digits = 3)
## [1] 0.164
round(N_RMSE_6 , digits = 3)
## [1] 0.082
#6. anti-asian hate crime: stepwise multiple regression with 10 variables
test_pred6.2 = predict(mod_asian_back2, newdata = test)
test_pred_df6.2=as.data.frame(test_pred6.2)
RMSE_6.2=sqrt(mean((test$ASIAN_HC_PER_CAPITA_POP - test_pred_df6.2$test_pred6)^2))
N_RMSE_6.2=RMSE_6.2/(max(test$ASIAN_HC_PER_CAPITA_POP)-min(test$ASIAN_HC_PER_CAPITA_POP))
round(RMSE_6.2 , digits = 3)
## [1] 1.203
round(N_RMSE_6.2 , digits = 3)
## [1] 0.599
#7. all racial hate crime: knn with 12 variables
test_pred = predict(simple_fit3, newdata = test)
test_pred_df=as.data.frame(test_pred)
RMSE_1=sqrt(mean((test$ALL_HC_PER_CAPITA - test_pred_df$test_pred)^2))
N_RMSE_1=RMSE_1/(max(test$ALL_HC_PER_CAPITA)-min(test$ALL_HC_PER_CAPITA))
round(RMSE_1, digits = 3)
## [1] 8.099
round(N_RMSE_1, digits = 3)
## [1] 0.261
#8. anti-black hate crime: knn with 12 variables
test_pred2 = predict(simple_fit4, newdata = test)
test_pred_df2=as.data.frame(test_pred2)
RMSE_2=sqrt(mean((test$BLACK_HC_PER_CAPITA_POP - test_pred_df2$test_pred2)^2))
N_RMSE_2=RMSE_2/(max(test$BLACK_HC_PER_CAPITA_POP)-min(test$BLACK_HC_PER_CAPITA_POP))
round(RMSE_2 , digits = 3)
## [1] 3.539
round(N_RMSE_2 , digits = 3)
## [1] 0.288
#9. anti-asian hate crime: knn with 12 variables
test_pred3 = predict(simple_fit5, newdata = test)
test_pred_df3=as.data.frame(test_pred3)
RMSE_3=sqrt(mean((test$ASIAN_HC_PER_CAPITA_POP - test_pred_df3$test_pred3)^2))
N_RMSE_3=RMSE_3/(max(test$ASIAN_HC_PER_CAPITA_POP)-min(test$ASIAN_HC_PER_CAPITA_POP))
round(RMSE_3 , digits = 3)
## [1] 0.56
round(N_RMSE_3 , digits = 3)
## [1] 0.279
4.2. Plot predicted (using knn) vs. actual value
t1<-data.frame(hate_crime=test$ALL_HC_PER_CAPITA, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("actual", length(test$ALL_HC_PER_CAPITA))))
t2<-data.frame(hate_crime=test_pred_df$test_pred, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("predicted", length(test$ALL_HC_PER_CAPITA))))
total<-rbind(t1, t2)
ggplot(data=total, aes(x=state, y=hate_crime, fill=value)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()+scale_fill_manual(values=c(culer[1],rgb(1,0,0,.6)))+ ggtitle("Actual vs. Predicted: All Racial Hate Crime") +
xlab("state (numerical index)") + ylab("number of racial hate crime")

t3<-data.frame(hate_crime=test$BLACK_HC_PER_CAPITA_POP, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("actual", length(test$BLACK_HC_PER_CAPITA_POP))))
t4<-data.frame(hate_crime=test_pred_df2$test_pred, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("predicted", length(test$BLACK_HC_PER_CAPITA_POP))))
total<-rbind(t3, t4)
ggplot(data=total, aes(x=state, y=hate_crime, fill=value)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()+scale_fill_manual(values=c(culer[2],rgb(0,0,1,.6)))+ ggtitle("Actual vs. Predicted: Anti-Black Hate Crime") +
xlab("state (numerical index)") + ylab("number of racial hate crime")

t5<-data.frame(hate_crime=test$ASIAN_HC_PER_CAPITA_POP, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("actual", length(test$ASIAN_HC_PER_CAPITA_POP))))
t6<-data.frame(hate_crime=test_pred_df3$test_pred, state=as.factor(c(4,5,8,11,16,20,21,24,31,32,50)), value=c(rep("predicted", length(test$ASIAN_HC_PER_CAPITA_POP))))
total<-rbind(t5, t6)
ggplot(data=total, aes(x=state, y=hate_crime, fill=value)) +
geom_bar(stat="identity", color="black", position=position_dodge())+
theme_minimal()+scale_fill_manual(values=c(rgb(0,0.8,0.1,.6),rgb(0,0.5,0.2,.6)))+ ggtitle("Actual vs. Predicted: Anti-Asian Hate Crime") +
xlab("state (numerical index)") + ylab("number of racial hate crime")
